library(SingleCellExperiment)
library(scater)
library(scran)
library(BiocParallel)
sce <- readRDS("data/sce_annotated.rds")Subset to T cells
sce_big <- sce
sce <- sce[, sce$label_immgen == "T cells"]
sce## class: SingleCellExperiment
## dim: 32286 3225
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(32286): ENSMUSG00000051951 ENSMUSG00000089699 ...
## ENSMUSG00000095041 YFP
## rowData names(3): ID Symbol Type
## colnames: NULL
## colData names(6): Sample Barcode ... label_immgen
## label_immgen_collapsed
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):
Rerun UMAP and clustering
#--- variance-modelling ---#
set.seed(00010101)
dec.sce <- modelGeneVarByPoisson(sce)
top.sce <- getTopHVGs(dec.sce, prop=0.1)
set.seed(101010011)
sce <- denoisePCA(sce, technical=dec.sce, subset.row=top.sce)
#sce <- runTSNE(sce, dimred="PCA")
sce <- runUMAP(sce, dimred="PCA",
metric = "cosine",
min_dist = 0.3,
n_neighbors = 30)
snn.gr <- buildSNNGraph(sce, use.dimred="PCA", k=25)
set.seed(2)
colLabels(sce) <- factor(igraph::cluster_leiden(snn.gr, resolution_parameter = 1.8)$membership)
table(colLabels(sce))##
## 1 2 3 4 5 6 7 8 9
## 542 795 521 191 469 88 510 105 4
sce <- sce[, sce$label != 9]
sce$label <- droplevels(sce$label)Define custom colors:
cluster_colors = c(
`1` = '#fdbf6f',
`2` = '#b2df8a',
`3` = '#33a02c',
`4` = '#fb9a99',
`5` = '#e31a1c',
`6` = '#a6cee3',
`7` = '#1f78b4',
`8` = '#ff7f00'
)plotUMAP(sce, colour_by="label", text_by="label") + coord_fixed() +
ggh4x::force_panelsizes(rows = unit(4, "in"),
cols = unit(4, "in")) +
scale_color_manual(values = cluster_colors)## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotUMAP(sce, colour_by="label", other_fields="Sample") + coord_fixed() +
facet_wrap(~Sample) +
ggh4x::force_panelsizes(rows = unit(2, "in"),
cols = unit(2, "in")) +
scale_color_manual(values = cluster_colors)## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotTableBarplot <- function(x, y, label_x, label_y) {
tbl <- table(x, y)
m <- prop.table(tbl, margin = 1)*100
m <- as.data.frame(m)
ggplot(m, aes(x=x, y=Freq, fill=y)) +
geom_bar(stat = "identity",
color="#444444") +
xlab(label_x) +
ylab(paste( "%" , label_y)) +
guides(fill=guide_legend(title=label_y)) +
ggthemes::theme_few() +
#scale_fill_manual(values = scater:::.get_palette("tableau10medium"))
scale_fill_manual(values = cluster_colors)
}
plotTableBarplot(x = sce$Sample,
y = sce$label,
label_x = "Sample",
label_y = "Cluster") +
ggh4x::force_panelsizes(rows = unit(4, "in"),
cols = unit(2, "in")) +
xlab('')MAGIC
library(magieR)
sce <- magieR(sce, n.jobs=14)We copy the rowData, colData and UMAP to the magic dataset
rowData(altExp(sce, "magic")) <- rowData(sce)
colData(altExp(sce, "magic")) <- colData(sce)
reducedDim(altExp(sce, "magic"), "UMAP") <- reducedDim(sce, "UMAP")Marker genes
Plot some marker genes to get an idea about the cell types
library(patchwork)
genes <- c("Cd4" , "Cd8a", "Foxp3", "Pdcd1", "Tcf7", "Adrb1", "Adrb2",
"Havcr2", "Cd101", "Cx3cr1", "Cxcr6", "Slamf6", "Entpd1", "Gata3", "Il2ra", "Mki67",
"Tox", "Cxcr3", "Gzmb", "Tbx21", "Cd44", "Klrk1", "Prf1", "Lag3",
"Sell", "Itgae", "Klrg1", "Bcl6", "Cxcr5", "Tnfrsf4", "Cd200", "Itga1",
"Ifng", "Tnf", "Gzma", "Gzmb", "Prf1", "Il2", 'Ikzf2', "Klf3", "Runx3",
"Klf2", "Nr4a1", "S1pr1", "Cd69", "Zfp683", "Prdm1")
genes = unique(genes)
genes = sort(genes)
plots <- purrr::map(genes, ~ {
p_umap <- plotUMAP(altExp(sce),
swap_rownames = "Symbol",
colour_by = .x) +
ggtitle(.x) +
ggthemes::theme_few() +
coord_fixed() +
theme(legend.position = c(0.8, 0.2),
#legend.spacing.x = unit(0, "mm"),
legend.spacing.y = unit(0, "mm"),
legend.title = element_blank(),
legend.background = element_rect(fill=alpha('white', 0.4)),
legend.text = element_text(size=8),
plot.title = element_text(face = "bold.italic"))
#p_umap
p_violin <- plotExpression(
altExp(sce),
features = .x,
swap_rownames = "Symbol",
colour_by = "label",
x = "label"
) +
ggthemes::theme_few() +
xlab("Cluster") +
ylab("Expression") +
theme(strip.background = element_blank(),
strip.text = element_blank(),
legend.position = "none") +
scale_color_manual(values = cluster_colors)
list(p_umap, p_violin)
})
plots <- unlist(plots, recursive = FALSE)
plot_cluster <- plotUMAP(sce, colour_by="label", text_by="label") + ggthemes::theme_few() +
ggtitle("Cluster") +
theme(legend.position = "none",
plot.title = element_text(face = "bold")) +
scale_color_manual(values = cluster_colors) +
coord_fixed()
plots[[length(plots) + 1]] = plot_cluster
p <- patchwork::wrap_plots(plots, ncol = 8) & theme(legend.key.size = unit(2.5, 'mm'))
pHeatmap of DE-genes
library(dplyr)##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ComplexHeatmap)
markers.up <- purrr::map(unique(colLabels(sce)), ~{
res = findMarkers(sce,
groups = colLabels(sce) == .x,
pval.type="all", direction="up", row.data=rowData(sce),
lfc = 0.1,
add.summary = TRUE,
test.type = "binom")
res$`TRUE`
})
markers.up## [[1]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000035042 ENSMUSG00000035042 Ccl5 Gene Expression 4.950571
## ENSMUSG00000004612 ENSMUSG00000004612 Nkg7 Gene Expression 2.695000
## ENSMUSG00000042385 ENSMUSG00000042385 Gzmk Gene Expression 0.343448
## ENSMUSG00000053977 ENSMUSG00000053977 Cd8a Gene Expression 1.722286
## ENSMUSG00000048521 ENSMUSG00000048521 Cxcr6 Gene Expression 0.885217
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000035042 0.9524074 0.970480 0.2952594 7.48249e-79
## ENSMUSG00000004612 0.9748792 0.837638 0.3613289 6.59609e-38
## ENSMUSG00000042385 0.0390605 0.145756 0.0238895 9.53273e-24
## ENSMUSG00000053977 0.7068299 0.649446 0.3184024 2.77388e-22
## ENSMUSG00000048521 0.2640694 0.357934 0.1414707 2.74553e-20
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000035042 2.41580e-74 1.71461 1.71461
## ENSMUSG00000004612 1.06481e-33 1.21161 1.21161
## ENSMUSG00000042385 1.02591e-19 2.57822 2.57822
## ENSMUSG00000053977 2.23894e-18 1.02693 1.02693
## ENSMUSG00000048521 1.77285e-16 1.33537 1.33537
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
##
## [[2]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000023927 ENSMUSG00000023927 Satb1 Gene Expression 1.937062
## ENSMUSG00000045092 ENSMUSG00000045092 S1pr1 Gene Expression 1.177296
## ENSMUSG00000026581 ENSMUSG00000026581 Sell Gene Expression 1.116014
## ENSMUSG00000027985 ENSMUSG00000027985 Lef1 Gene Expression 1.019217
## ENSMUSG00000020846 ENSMUSG00000020846 Rflnb Gene Expression 0.435921
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000023927 0.735486 0.613836 0.3211047 1.23084e-22
## ENSMUSG00000045092 0.413271 0.411321 0.1908491 3.88949e-21
## ENSMUSG00000026581 0.481122 0.391195 0.2081616 2.24428e-14
## ENSMUSG00000027985 0.423690 0.364780 0.1986810 1.14798e-12
## ENSMUSG00000020846 0.128670 0.161006 0.0630668 1.30577e-12
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000023927 3.97389e-18 0.933482 0.933482
## ENSMUSG00000045092 6.27880e-17 1.105321 1.105321
## ENSMUSG00000026581 2.41529e-10 0.908175 0.908175
## ENSMUSG00000027985 8.43159e-09 0.874524 0.874524
## ENSMUSG00000020846 8.43159e-09 1.343583 1.343583
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
##
## [[3]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000026581 ENSMUSG00000026581 Sell Gene Expression 1.54562
## ENSMUSG00000000782 ENSMUSG00000000782 Tcf7 Gene Expression 1.92293
## ENSMUSG00000027985 ENSMUSG00000027985 Lef1 Gene Expression 1.25667
## ENSMUSG00000023927 ENSMUSG00000023927 Satb1 Gene Expression 2.02189
## ENSMUSG00000026989 ENSMUSG00000026989 Dapl1 Gene Expression 0.72507
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000026581 0.462654 0.608445 0.1848148 6.27945e-49
## ENSMUSG00000000782 0.724573 0.727447 0.2844444 4.49905e-39
## ENSMUSG00000027985 0.438306 0.518234 0.1859259 7.69630e-33
## ENSMUSG00000023927 0.841055 0.735125 0.3274074 1.92378e-30
## ENSMUSG00000026989 0.182960 0.295585 0.0766667 7.41670e-30
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000026581 2.02738e-44 1.71568 1.71568
## ENSMUSG00000000782 7.26282e-35 1.35278 1.35278
## ENSMUSG00000027985 8.28275e-29 1.47579 1.47579
## ENSMUSG00000023927 1.55278e-26 1.16539 1.16539
## ENSMUSG00000026989 4.78911e-26 1.93829 1.93829
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
##
## [[4]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type
## <character> <character> <character>
## ENSMUSG00000022584 ENSMUSG00000022584 Ly6c2 Gene Expression
## ENSMUSG00000032666 ENSMUSG00000032666 1700025G04Rik Gene Expression
## ENSMUSG00000057329 ENSMUSG00000057329 Bcl2 Gene Expression
## ENSMUSG00000004612 ENSMUSG00000004612 Nkg7 Gene Expression
## ENSMUSG00000024910 ENSMUSG00000024910 Ctsw Gene Expression
## ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression
## self.average other.average self.detected other.detected
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000022584 1.552598 0.294772 0.596859 0.117822
## ENSMUSG00000032666 0.318471 0.054631 0.193717 0.029703
## ENSMUSG00000057329 1.398871 0.634577 0.654450 0.274257
## ENSMUSG00000004612 2.232064 1.203323 0.858639 0.415182
## ENSMUSG00000024910 0.995101 0.446038 0.518325 0.217492
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 0
## ENSMUSG00000095523 0 0 0 0
## ENSMUSG00000095475 0 0 0 0
## ENSMUSG00000094855 0 0 0 0
## ENSMUSG00000095019 0 0 0 0
## p.value FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000022584 1.41094e-35 4.55536e-31 2.33470 2.33470
## ENSMUSG00000032666 2.16899e-15 3.50141e-11 2.68004 2.68004
## ENSMUSG00000057329 2.63198e-14 2.83254e-10 1.25286 1.25286
## ENSMUSG00000004612 2.07575e-13 1.67544e-09 1.04720 1.04720
## ENSMUSG00000024910 1.20218e-11 7.76269e-08 1.25051 1.25051
## ... ... ... ... ...
## ENSMUSG00000095763 1 1 0 0
## ENSMUSG00000095523 1 1 0 0
## ENSMUSG00000095475 1 1 0 0
## ENSMUSG00000094855 1 1 0 0
## ENSMUSG00000095019 1 1 0 0
##
## [[5]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000053310 ENSMUSG00000053310 Nrgn Gene Expression 1.231789
## ENSMUSG00000026475 ENSMUSG00000026475 Rgs16 Gene Expression 1.216841
## ENSMUSG00000030124 ENSMUSG00000030124 Lag3 Gene Expression 1.049661
## ENSMUSG00000026826 ENSMUSG00000026826 Nr4a2 Gene Expression 0.989286
## ENSMUSG00000041515 ENSMUSG00000041515 Irf8 Gene Expression 0.977445
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000053310 0.064212 0.575693 0.0301599 1.80962e-143
## ENSMUSG00000026475 0.102185 0.590618 0.0483285 2.91712e-124
## ENSMUSG00000030124 0.132325 0.582090 0.0639535 8.45937e-106
## ENSMUSG00000026826 0.118990 0.509595 0.0559593 7.01253e-93
## ENSMUSG00000041515 0.137297 0.535181 0.0690407 1.26588e-88
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000053310 5.84254e-139 4.22675 4.22675
## ENSMUSG00000026475 4.70910e-120 3.59438 3.59438
## ENSMUSG00000030124 9.10398e-102 3.17374 3.17374
## ENSMUSG00000026826 5.66017e-89 3.17274 3.17274
## ENSMUSG00000041515 8.17407e-85 2.94326 2.94326
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
##
## [[6]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000029075 ENSMUSG00000029075 Tnfrsf4 Gene Expression 1.869825
## ENSMUSG00000026770 ENSMUSG00000026770 Il2ra Gene Expression 0.977577
## ENSMUSG00000040204 ENSMUSG00000040204 Pclaf Gene Expression 0.889277
## ENSMUSG00000037706 ENSMUSG00000037706 Cd81 Gene Expression 0.783902
## ENSMUSG00000028832 ENSMUSG00000028832 Stmn1 Gene Expression 1.174229
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000029075 0.316400 0.863636 0.1461858 1.46069e-29
## ENSMUSG00000026770 0.130271 0.568182 0.0683051 1.07582e-25
## ENSMUSG00000040204 0.137652 0.568182 0.0692627 1.84186e-25
## ENSMUSG00000037706 0.133870 0.522727 0.0683051 1.73813e-22
## ENSMUSG00000028832 0.248327 0.659091 0.1200128 1.35741e-21
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000029075 4.71598e-25 2.55754 2.55754
## ENSMUSG00000026770 1.73669e-21 3.04481 3.04481
## ENSMUSG00000040204 1.98221e-21 3.02490 3.02490
## ENSMUSG00000037706 1.40293e-18 2.92465 2.92465
## ENSMUSG00000028832 8.76506e-18 2.45120 2.45120
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
##
## [[7]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000029075 ENSMUSG00000029075 Tnfrsf4 Gene Expression 1.197670
## ENSMUSG00000055435 ENSMUSG00000055435 Maf Gene Expression 1.013128
## ENSMUSG00000039521 ENSMUSG00000039521 Foxp3 Gene Expression 0.601526
## ENSMUSG00000025997 ENSMUSG00000025997 Ikzf2 Gene Expression 1.505087
## ENSMUSG00000002489 ENSMUSG00000002489 Tiam1 Gene Expression 0.340345
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000029075 0.2010385 0.472549 0.1080782 1.28356e-52
## ENSMUSG00000055435 0.1822689 0.413725 0.0922169 1.70516e-47
## ENSMUSG00000039521 0.0532864 0.256863 0.0328292 4.19128e-46
## ENSMUSG00000025997 0.3271730 0.505882 0.1552932 7.91963e-40
## ENSMUSG00000002489 0.0430825 0.172549 0.0276651 3.28875e-27
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000029075 4.14409e-48 2.12202 2.12202
## ENSMUSG00000055435 2.75264e-43 2.15805 2.15805
## ENSMUSG00000039521 4.51066e-42 2.94440 2.94440
## ENSMUSG00000025997 6.39233e-36 1.69982 1.69982
## ENSMUSG00000002489 2.12361e-23 2.61403 2.61403
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
##
## [[8]]
## DataFrame with 32286 rows and 11 columns
## ID Symbol Type self.average
## <character> <character> <character> <numeric>
## ENSMUSG00000040204 ENSMUSG00000040204 Pclaf Gene Expression 1.76797
## ENSMUSG00000017716 ENSMUSG00000017716 Birc5 Gene Expression 1.19243
## ENSMUSG00000031004 ENSMUSG00000031004 Mki67 Gene Expression 1.63012
## ENSMUSG00000069272 ENSMUSG00000069272 Hist1h2ae Gene Expression 1.26500
## ENSMUSG00000094777 ENSMUSG00000094777 Hist1h2ap Gene Expression 1.77379
## ... ... ... ... ...
## ENSMUSG00000095763 ENSMUSG00000095763 AC124606.2 Gene Expression 0
## ENSMUSG00000095523 ENSMUSG00000095523 AC124606.1 Gene Expression 0
## ENSMUSG00000095475 ENSMUSG00000095475 AC133095.2 Gene Expression 0
## ENSMUSG00000094855 ENSMUSG00000094855 AC133095.1 Gene Expression 0
## ENSMUSG00000095019 ENSMUSG00000095019 AC234645.1 Gene Expression 0
## other.average self.detected other.detected p.value
## <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000040204 0.1039424 0.761905 0.0600128 1.15467e-50
## ENSMUSG00000017716 0.0685681 0.647619 0.0430039 4.50611e-47
## ENSMUSG00000031004 0.1325442 0.714286 0.0718870 1.36437e-41
## ENSMUSG00000069272 0.0537482 0.523810 0.0304878 1.06784e-40
## ENSMUSG00000094777 0.1016822 0.600000 0.0503851 5.98853e-39
## ... ... ... ... ...
## ENSMUSG00000095763 0 0 0 1
## ENSMUSG00000095523 0 0 0 1
## ENSMUSG00000095475 0 0 0 1
## ENSMUSG00000094855 0 0 0 1
## ENSMUSG00000095019 0 0 0 1
## FDR summary.logFC logFC.FALSE
## <numeric> <numeric> <numeric>
## ENSMUSG00000040204 3.72798e-46 3.65259 3.65259
## ENSMUSG00000017716 7.27422e-43 3.89331 3.89331
## ENSMUSG00000031004 1.46834e-37 3.30154 3.30154
## ENSMUSG00000069272 8.61905e-37 4.07536 4.07536
## ENSMUSG00000094777 3.86691e-35 3.55771 3.55771
## ... ... ... ...
## ENSMUSG00000095763 1 0 0
## ENSMUSG00000095523 1 0 0
## ENSMUSG00000095475 1 0 0
## ENSMUSG00000094855 1 0 0
## ENSMUSG00000095019 1 0 0
chosen <- lapply(markers.up, function(x) {
x %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
filter(!grepl("^(Rps|Rpl)", Symbol)) %>%
filter(self.detected > 0.1) %>%
slice(n=1:10) %>%
dplyr::select(rowname)
})
chosen <- data.table::rbindlist(chosen, idcol="cluster")
chosen$cluster <- as.factor(chosen$cluster)
chosen$symbol <- rowData(sce)[chosen$rowname, "Symbol"]
m <- as.matrix(logcounts(sce)[chosen$rowname,])
n_cluster <- length(unique(colLabels(sce)))
colors = as.character(paletteer::paletteer_c("viridis::cividis", n=n_cluster))
names(colors) = unique(colLabels(sce))
col_fun = circlize::colorRamp2(seq(-2,2, length.out = 9),
rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9)))
Heatmap(t(scale(t(m))),
use_raster = TRUE,
cluster_rows = FALSE,
cluster_columns = FALSE,
column_split = colLabels(sce),
row_split = chosen$cluster,
row_labels = chosen$symbol,
row_names_gp = grid::gpar(fontsize = 8),
top_annotation = HeatmapAnnotation(
cluster=colLabels(sce),
col = list(
cluster = cluster_colors
)
),
name = "z-score",
col = col_fun,
show_column_names = FALSE)## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
Annotate with Hudson
# create a bulk reference dds object
library(tximeta)
library(BiocParallel)
library(SingleR)
anno <- readr::read_csv(file = "reference_hudson/20200410_Hudson_Anno.csv")## Registered S3 method overwritten by 'cli':
## method from
## print.boxx spatstat.geom
## Rows: 24 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): SRR_no, Subset_time, Subset, Dataset, PMID, Cells
## dbl (1): day
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anno$files <- file.path("reference_hudson/RNA/", anno$SRR_no, "quant.sf")
anno$names <- anno$SRR_no
se <- tximeta(anno)## importing quantifications
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## found matching transcriptome:
## [ GENCODE - Mus musculus - release M24 ]
## loading existing TxDb created: 2023-08-17 05:33:26
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## loading existing transcript ranges created: 2023-08-17 05:33:27
## fetching genome info for GENCODE
gse <- summarizeToGene(se)## loading existing TxDb created: 2023-08-17 05:33:26
## obtaining transcript-to-gene mapping from database
## loading existing gene ranges created: 2023-08-17 05:33:31
## summarizing abundance
## summarizing counts
## summarizing length
rownames(gse) = gsub("\\..*$", "", rownames(gse))
pred <- SingleR(test = sce, ref = gse,
labels = gse$Subset,
assay.type.test=1,
assay.type.ref=1,
BPPARAM = MulticoreParam())
plotScoreHeatmap(pred,
annotation_col=as.data.frame(colData(sce)[,"label",drop=FALSE]))sce$hudson_labels <- pred$pruned.labelscd8 <- sce$label %in% c(1,3,4,5,8)
tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
"#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
"#CDCC5D", "#6DCCDA")
p_husdon_1 <- plotUMAP(sce[,cd8], colour_by = "hudson_labels") +
scale_color_manual(values = c(
`Transitory` = tableau10medium[1],
`Stem-like` = tableau10medium[3],
`Exhausted` = tableau10medium[4],
`Naive` = tableau10medium[8]
))## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p_husdon_1plotUMAP(sce[,cd8], colour_by = "hudson_labels",
point_alpha=0.3) +
scale_color_manual(values = c(
`Transitory` = tableau10medium[1],
`Stem-like` = tableau10medium[3],
`Exhausted` = tableau10medium[4],
`Naive` = tableau10medium[8]
)) +
geom_density_2d(aes(colour=colour_by),
contour_var = "ndensity")## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotTableBarplot(x = sce[,cd8]$Sample,
y = sce[,cd8]$hudson_labels,
label_x = "Sample",
label_y = "Hudson label") +
scale_fill_manual(values = c(
`Transitory` = tableau10medium[1],
`Stem-like` = tableau10medium[3],
`Exhausted` = tableau10medium[4],
`Naive` = tableau10medium[8]
))## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
T cell function
Pathway analysis with AUCell
AUCell
library(AUCell)
cell_rankings <- AUCell::AUCell_buildRankings(logcounts(sce), nCores = 1)## Quantiles for the number of genes detected by cell:
## (Non-detected genes are shuffled at the end of the ranking. Keep it in mind when choosing the threshold for calculating the AUC).
## min 1% 5% 10% 50% 100%
## 339 369 408 449 672 4163
TRM signature
Milner = read.csv(file = "gene_lists/2021-04-22 Core Trm signature_Milner et al Nature 2017_vIL_corrected.txt",
header = FALSE)[[1]]
trm <- list(
Milner = rownames(sce)[match(Milner, rowData(sce)$Symbol)]
)
cell_AUC_trm <- AUCell_calcAUC(trm, cell_rankings, aucMaxRank=nrow(cell_rankings)*0.05,
verbose=FALSE, nCores=10)
## add to colData
colData(sce) <- cbind(colData(sce), t(getAUC(cell_AUC_trm)))
plotColData(sce, x="Sample", y="Milner", colour_by = "Sample") +
geom_boxplot(outlier.shape = NA,
width = .2,
alpha= .4) +
ggh4x::force_panelsizes(rows = unit(4, "in"),
cols = unit(2, "in"))Exhaustion signature
exh <- read.csv("gene_lists/exhaustion_gene_lists.csv")
exh <- list(
Wherry = na.omit(rownames(sce)[match(na.omit(exh$Wherry), rowData(sce)$Symbol)])
)
cell_AUC_exh <- AUCell_calcAUC(exh, cell_rankings, aucMaxRank=nrow(cell_rankings)*0.05,
verbose=FALSE, nCores=10)
## add to colData
colData(sce) <- cbind(colData(sce), t(getAUC(cell_AUC_exh)))
plotColData(sce, x="Sample", y="Wherry", colour_by = "Sample") +
geom_boxplot(outlier.shape = NA,
width = .2,
alpha= .4) +
ggh4x::force_panelsizes(rows = unit(4, "in"),
cols = unit(2, "in"))Synergistic effects of ICB and Propranolol
CD8 T cells
To test that, we compare ICB vs IgG2a, Propranolol vs IgG2a and Propranolol_ICB vs IgG2a.
markers <- findMarkers(sce[,cd8], direction="up", groups=sce[,cd8]$Sample, full.stats=TRUE,
test.type="wilcox")
markers_up <- list(
ICB = rownames(subset(markers$ICB$stats.IgG2A, log.FDR < log(0.05))),
Propranolol = rownames(subset(markers$Propranolol$stats.IgG2A, log.FDR < log(0.05))),
Propranolol_ICB = rownames(subset(markers$Propranolol_ICB$stats.IgG2A, log.FDR < log(0.05)))
)
list_to_upset_dataframe <- function(l) {
u = unique(unlist(l))
df = data.frame(gene = u)
for(n in names(l))
df[[n]] = u %in% l[[n]]
df
}
venn_df = list_to_upset_dataframe(markers_up)
ensembl2gene = as.data.frame(rowData(sce))[,1:2]
venn_df %>%
dplyr::left_join(ensembl2gene, by = c("gene" = "ID")) %>%
arrange(desc(Propranolol_ICB), desc(Propranolol), desc(ICB)) %>%
dplyr::select(Symbol, everything()) %>%
DT::datatable(
rownames = FALSE,
filter = "top",
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel')
)
)Session Info
sessionInfo()## R version 4.1.3 (2022-03-10)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora Linux 36 (Container Image)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libflexiblas.so.3.3
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices datasets
## [8] utils methods base
##
## other attached packages:
## [1] AUCell_1.14.0 GenomicFeatures_1.44.2
## [3] AnnotationDbi_1.54.1 SingleR_1.6.1
## [5] tximeta_1.10.0 dplyr_1.0.7
## [7] ComplexHeatmap_2.8.0 patchwork_1.1.1
## [9] magieR_0.1.0 BiocParallel_1.26.2
## [11] scran_1.20.1 scater_1.20.1
## [13] ggplot2_3.3.5 scuttle_1.2.1
## [15] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [17] Biobase_2.52.0 GenomicRanges_1.44.0
## [19] GenomeInfoDb_1.28.2 IRanges_2.26.0
## [21] S4Vectors_0.30.0 BiocGenerics_0.38.0
## [23] MatrixGenerics_1.4.3 matrixStats_0.60.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.52.1
## [3] scattermore_0.7 ggthemes_4.2.4
## [5] R.methodsS3_1.8.1 SeuratObject_4.0.2
## [7] tidyr_1.1.3 bit64_4.0.5
## [9] knitr_1.33 R.utils_2.10.1
## [11] irlba_2.3.3 DelayedArray_0.18.0
## [13] data.table_1.14.0 rpart_4.1-15
## [15] AnnotationFilter_1.16.0 KEGGREST_1.32.0
## [17] RCurl_1.98-1.4 doParallel_1.0.16
## [19] generics_0.1.0 ScaledMatrix_1.0.0
## [21] cowplot_1.1.1 RSQLite_2.2.8
## [23] RANN_2.6.1 future_1.22.1
## [25] tzdb_0.1.2 bit_4.0.4
## [27] xml2_1.3.2 spatstat.data_2.1-0
## [29] httpuv_1.6.2 isoband_0.2.5
## [31] assertthat_0.2.1 viridis_0.6.1
## [33] tximport_1.20.0 xfun_0.25
## [35] hms_1.1.0 jquerylib_0.1.4
## [37] evaluate_0.14 promises_1.2.0.1
## [39] restfulr_0.0.13 fansi_0.5.0
## [41] progress_1.2.2 dbplyr_2.1.1
## [43] igraph_1.2.6.9118 DBI_1.1.1
## [45] htmlwidgets_1.5.3 spatstat.geom_2.2-2
## [47] paletteer_1.4.0 purrr_0.3.4
## [49] ellipsis_0.3.2 crosstalk_1.1.1
## [51] RSpectra_0.16-0 bookdown_0.23
## [53] annotate_1.70.0 prismatic_1.0.0
## [55] biomaRt_2.48.3 deldir_0.2-10
## [57] sparseMatrixStats_1.4.2 vctrs_0.3.8
## [59] ensembldb_2.16.4 here_1.0.1
## [61] Cairo_1.5-12.2 ROCR_1.0-11
## [63] abind_1.4-5 cachem_1.0.6
## [65] withr_2.4.2 ggh4x_0.2.1
## [67] vroom_1.5.4 sctransform_0.3.2
## [69] GenomicAlignments_1.28.0 prettyunits_1.1.1
## [71] goftest_1.2-2 cluster_2.1.2
## [73] dir.expiry_1.0.0 lazyeval_0.2.2
## [75] crayon_1.4.1 basilisk.utils_1.4.0
## [77] edgeR_3.34.0 pkgconfig_2.0.3
## [79] labeling_0.4.2 ProtGenerics_1.24.0
## [81] nlme_3.1-152 vipor_0.4.5
## [83] rlang_0.4.11 globals_0.14.0
## [85] lifecycle_1.0.0 miniUI_0.1.1.1
## [87] filelock_1.0.2 BiocFileCache_2.0.0
## [89] rsvd_1.0.5 AnnotationHub_3.0.1
## [91] rprojroot_2.0.2 polyclip_1.10-0
## [93] lmtest_0.9-38 graph_1.70.0
## [95] Matrix_1.3-4 zoo_1.8-9
## [97] beeswarm_0.4.0 pheatmap_1.0.12
## [99] ggridges_0.5.3 GlobalOptions_0.1.2
## [101] png_0.1-7 viridisLite_0.4.0
## [103] rjson_0.2.20 bitops_1.0-7
## [105] R.oo_1.24.0 KernSmooth_2.23-20
## [107] Biostrings_2.60.2 blob_1.2.2
## [109] DelayedMatrixStats_1.14.3 shape_1.4.6
## [111] stringr_1.4.0 parallelly_1.27.0
## [113] readr_2.0.1 beachmat_2.8.1
## [115] scales_1.1.1 GSEABase_1.54.0
## [117] memoise_2.0.0 magrittr_2.0.1
## [119] plyr_1.8.6 ica_1.0-2
## [121] zlibbioc_1.38.0 compiler_4.1.3
## [123] BiocIO_1.2.0 dqrng_0.3.0
## [125] RColorBrewer_1.1-2 clue_0.3-59
## [127] fitdistrplus_1.1-5 cli_3.0.1
## [129] Rsamtools_2.8.0 XVector_0.32.0
## [131] listenv_0.8.0 pbapply_1.4-3
## [133] MASS_7.3-54 mgcv_1.8-36
## [135] tidyselect_1.1.1 stringi_1.7.4
## [137] highr_0.9 yaml_2.2.1
## [139] BiocSingular_1.8.1 locfit_1.5-9.4
## [141] ggrepel_0.9.1 sass_0.4.0
## [143] tools_4.1.3 future.apply_1.8.1
## [145] rstudioapi_0.13 circlize_0.4.13
## [147] bluster_1.2.1 foreach_1.5.1
## [149] metapod_1.0.0 gridExtra_2.3
## [151] rmdformats_1.0.2 farver_2.1.0
## [153] Rtsne_0.15 digest_0.6.27
## [155] BiocManager_1.30.16 shiny_1.6.0
## [157] Rcpp_1.0.7 BiocVersion_3.13.1
## [159] later_1.3.0 RcppAnnoy_0.0.19
## [161] httr_1.4.2 colorspace_2.0-2
## [163] XML_3.99-0.7 tensor_1.5
## [165] reticulate_1.24 splines_4.1.3
## [167] uwot_0.1.10 statmod_1.4.36
## [169] rematch2_2.1.2 spatstat.utils_2.2-0
## [171] basilisk_1.4.0 renv_0.15.4
## [173] plotly_4.9.4.1 xtable_1.8-4
## [175] jsonlite_1.7.2 R6_2.5.1
## [177] pillar_1.6.2 htmltools_0.5.2
## [179] mime_0.11 DT_0.18
## [181] glue_1.4.2 fastmap_1.1.0
## [183] BiocNeighbors_1.10.0 interactiveDisplayBase_1.30.0
## [185] codetools_0.2-18 utf8_1.2.2
## [187] lattice_0.20-44 bslib_0.2.5.1
## [189] spatstat.sparse_2.0-0 tibble_3.1.4
## [191] curl_4.3.2 ggbeeswarm_0.6.0
## [193] leiden_0.3.9 survival_3.2-11
## [195] limma_3.48.3 rmarkdown_2.10
## [197] munsell_0.5.0 GetoptLong_1.0.5
## [199] GenomeInfoDbData_1.2.6 iterators_1.0.13
## [201] reshape2_1.4.4 gtable_0.3.0
## [203] spatstat.core_2.3-0 Seurat_4.0.4